
########################################################
####    Compare treatment effects by match status    ###
########################################################

# This file tests for differences in treatment effects between the matched and full samples
# on a list of variables other than the outcomes of interest.

# That is:
# for any variable, calculate the treatment-control difference 
# for the matched sample and the full sample separately
# and export the difference between the effects (and standard error)

#load packages
library(readstata13)
library(dplyr)
library(here)

i_am("2_match_comparison.R")

## LOAD AND CLEAN ####

# load dataset
dat <- read.csv(here("combined_dataset.csv"))

# create match indicator
dat <- dat %>% 
  mutate(matched = case_when(is.na(posterior)~0,
                             T~1))

# code gender indicator
dat <- dat %>%
  mutate(sex = case_when(x_f_ch_male==1|x_f_ad_male==1~"M",
                         x_f_ch_male==0|x_f_ad_male==0~"F",
                         is.na(x_f_ch_male)&is.na(x_f_ad_male)&f_svy_gender=="M"~"M",
                         is.na(x_f_ch_male)&is.na(x_f_ad_male)&f_svy_gender=="F"~"F"))

# code age group indicator
dat <- dat %>%
  mutate(age_group = case_when((ra_year - f_svy_yob_imp_ytgc)>=13~"old_kid",
                               (ra_year - f_svy_yob_imp_ytgc)<13~"young_kid",
                               f_svy_sample2007 %in% c("AD", "ES")~"adult"))
## Create function for differences ####

compare_mto <- function(var,
                        cat=NA,
                        catname,
                        survey,
                        sample,
                        continuous=FALSE){
  
  #look only at control and experimental groups
  sub <- dat[dat$ra_group_factor!="section 8",]
  
  # set the variable to test for differences on
  if(continuous==FALSE){
    sub$var <- ifelse(sub[,var]==cat, 1, 0)
  }
  if(continuous==TRUE){
    sub$var <- sub[,var]
  }
  
  # select the appropriate weight
  if(survey=="final"&sample=="adult"){
    sub <- sub[sub$f_svy_sample2007=="AD",]
    sub$wt <- sub$f_wt_totsvy_ad
  }
  
  if(survey=="final"&sample=="youth"){
    sub <- sub[sub$f_svy_sample2007=="YT",]
    sub$wt <- sub$f_wt_totsvy
  }
  
  if(survey=="roster"&sample=="roster"){
    sub <- sub
    sub$wt <- sub$f_wt_totcore98
  }
  
  if(survey=="final"&sample=="roster"){
    sub <- sub
    sub$wt <- sub$f_wt_totsvy
  }
  
  if(survey=="final"&sample=="report"){
    sub <- sub
    sub$wt <- sub$f_wt_totcore98
  }
  
  # test for treatment effects in full sample
  mod1 <- lm(formula = var ~ ra_group_factor, 
             data = sub, weights = wt)
  # test for treatment effects in matched sample
  mod2 <- lm(formula = var ~ ra_group_factor, 
             data = sub[sub$matched==1,], weights = wt)
  
  # record full sample difference
  full_exp <- as.numeric(mod1$coefficients[1] + mod1$coefficients[2])
  full_con <- as.numeric(mod1$coefficients[1])
  full_diff <- full_exp - full_con
  
  # record matched sample difference
  mat_exp <- as.numeric(mod2$coefficients[1] + mod2$coefficients[2])
  mat_con <- as.numeric(mod2$coefficients[1])
  mat_diff <- mat_exp - mat_con
  
  # record difference in differences
  diff_diff <- full_diff - mat_diff
  # calculate standard error
  diff_se <- sqrt(summary(mod1)$coefficients[2,2]^2 + 
                    summary(mod2)$coefficients[2,2]^2)
  # calculate t-statistic
  diff_t <- diff_diff/diff_se
  
  # export results
  return(data.frame("varname"=var,
                    "catname"=catname,
                    "full_exp"=full_exp,
                    "full_con"=full_con,
                    "full_diff"=full_diff,
                    "mat_exp"=mat_exp,
                    "mat_con"=mat_con,
                    "mat_diff"=mat_diff,
                    "diff_diff"=diff_diff,
                    "diff_t"=diff_t))
  
}



## Repeat for each set of variables and samples: ####
## Adult final ####

# marital status
tab1 <- compare_mto(var="hrl1",
                    catname="married",
                    cat=1,
                    survey="final",
                    sample="adult")

# frequency of visiting with friends
tab1 <- bind_rows(tab1, compare_mto(var="hsn21",
                                    cat=7,
                                    catname="no friend visits",
                                    survey="final",
                                    sample="adult"))

# church attendance 
tab1 <- bind_rows(tab1, compare_mto(var="hsn31",
                                    cat=1,
                                    catname="no church",
                                    survey="final",
                                    sample="adult"))

# whether respondent has seen drugs in neighborhood
tab1 <- bind_rows(tab1, compare_mto(var="hnb3_b_1",
                                    cat=1,
                                    catname="seen drugs",
                                    survey="final",
                                    sample="adult"))

# how safe respondent feels in neighborhood 
tab1 <- bind_rows(tab1, compare_mto(var="hnb9b_b_1",
                                    cat=1,
                                    catname="very safe",
                                    survey="final",
                                    sample="adult"))

# whether trash in neighborhood is a big problem
tab1 <- bind_rows(tab1, compare_mto(var="hnb2a_b_1",
                                    cat=1,
                                    catname="trash a big prob",
                                    survey="final",
                                    sample="adult"))

# whether grafitti in neighborhood is a big problem
tab1 <- bind_rows(tab1, compare_mto(var="hnb2b_b_1",
                                    cat=1,
                                    catname="graffiti a big prob",
                                    survey="final",
                                    sample="adult"))

# whether abandoned buildings in neighborhood are a big problem
tab1 <- bind_rows(tab1, compare_mto(var="hnb2d_b_1",
                                    cat=1,
                                    catname="aband. builds big prob",
                                    survey="final",
                                    sample="adult"))

# experience with racist treatment
tab1 <- bind_rows(tab1, compare_mto(var="hnb6b",
                                    cat=1,
                                    catname="racist treatment",
                                    survey="final",
                                    sample="adult"))

# experience with discrimination by the police
tab1 <- bind_rows(tab1, compare_mto(var="hnb6d",
                                    cat=1,
                                    catname="police discrim",
                                    survey="final",
                                    sample="adult"))

# satisfaction with neighborhood
tab1 <- bind_rows(tab1, compare_mto(var="hnb1_b_1",
                                    cat=1,
                                    catname="v satisf w nbhd",
                                    survey="final",
                                    sample="adult"))

# belief police will come when called
tab1 <- bind_rows(tab1, compare_mto(var="hnb2f_b_1",
                                    cat=1,
                                    catname="police don't come",
                                    survey="final",
                                    sample="adult"))


## Youth final ####

# whether subject has children living with them
tab1 <- bind_rows(tab1, compare_mto(var="yhl1d",
                                    cat=1,
                                    catname="children live w me",
                                    survey="final",
                                    sample="youth"))

# frequency of visiting with friends
tab1 <- bind_rows(tab1, compare_mto(var="ysn12",
                                    cat=1,
                                    catname="visit friends daily",
                                    survey="final",
                                    sample="youth"))

# church attendance
tab1 <- bind_rows(tab1, compare_mto(var="ysn13",
                                    cat=4,
                                    catname="attend youth church never",
                                    survey="final",
                                    sample="youth"))

# satisfaction with neighborhood
tab1 <- bind_rows(tab1, compare_mto(var="ynb2_b_1",
                                    cat=1,
                                    catname="v satisf w nbhd",
                                    survey="final",
                                    sample="youth"))

# whether neighborhood feels safe at night
tab1 <- bind_rows(tab1, compare_mto(var="ynb3b_b_1",
                                    cat=1,
                                    catname="nbhd v safe at night",
                                    survey="final",
                                    sample="youth"))

# whether police have treated respondent unfairly
tab1 <- bind_rows(tab1, compare_mto(var="ynb4e",
                                    cat=1,
                                    catname="police treated unfairly",
                                    survey="final",
                                    sample="youth"))

# whether respondent has seen a drug use or sale recently
tab1 <- bind_rows(tab1, compare_mto(var="ynb6_b_1",
                                    cat=1,
                                    catname="saw drug use or sale",
                                    survey="final",
                                    sample="youth"))

# whether respondent received HS diploma
tab1 <- bind_rows(tab1, compare_mto(var="yed3c",
                                    cat=1,
                                    catname="received hs diploma",
                                    survey="final",
                                    sample="youth"))

# employment status 
tab1 <- bind_rows(tab1, compare_mto(var="yem1",
                                    cat=1,
                                    catname="worked for pay",
                                    survey="final",
                                    sample="youth"))

# welfare receipt
tab1 <- bind_rows(tab1, compare_mto(var="yrb35_u34",
                                    cat=1,
                                    catname="received welfare",
                                    survey="final",
                                    sample="youth"))

## Roster variables ####

# employment status
tab1 <- bind_rows(tab1, compare_mto(var="hho5",
                                    cat=1,
                                    catname="working full time",
                                    survey="roster",
                                    sample="roster"))

# relationship status
tab1 <- bind_rows(tab1, compare_mto(var="hho6",
                                    cat=1,
                                    catname="single",
                                    survey="roster",
                                    sample="roster"))

# number of children
tab1 <- bind_rows(tab1, compare_mto(var="hho7",
                                    cat=0,
                                    catname="no bio children",
                                    survey="roster",
                                    sample="roster"))

# spent time in jail or prison
tab1 <- bind_rows(tab1, compare_mto(var="hho10a",
                                    cat=1,
                                    catname="time in jail or prison",
                                    survey="roster",
                                    sample="roster"))

## Final Analysis Variables ####

# birth year
tab1 <- bind_rows(tab1, compare_mto(var="f_svy_yob_imp_ytgc",
                                    continuous=TRUE,
                                    catname="year of birth",
                                    survey="final",
                                    sample="roster"))

# receipt of TANF
tab1 <- bind_rows(tab1, compare_mto(var="f_in_tanf_fam",
                                    cat=1,
                                    catname="receiving TANF",
                                    survey="final",
                                    sample="roster"))

# presence of behavioral problems
tab1 <- bind_rows(tab1, compare_mto(var="x_f_c1_behprb",
                                    cat=1,
                                    catname="behavioral prob",
                                    survey="final",
                                    sample="report"))

# whether expelled from school
tab1 <- bind_rows(tab1, compare_mto(var="x_f_c1_expel",
                                    cat=1,
                                    catname="expelled",
                                    survey="final",
                                    sample="report"))

# whether designated gifted in school
tab1 <- bind_rows(tab1, compare_mto(var="x_f_c1_gifted",
                                    cat=1,
                                    catname="gifted",
                                    survey="final",
                                    sample="report"))

# whether diagnosed with a learning problem in school
tab1 <- bind_rows(tab1, compare_mto(var="x_f_c1_lrnprb",
                                    cat=1,
                                    catname="learning prob",
                                    survey="final",
                                    sample="report"))

# census tract share of professional workers
tab1 <- bind_rows(tab1, compare_mto(var="f_c9010t_histatus_dw",
                                    continuous=1,
                                    catname="share prof workers",
                                    survey="roster",
                                    sample="roster"))

# census track share black 
tab1 <- bind_rows(tab1, compare_mto(var="f_c9010t_pblack_dw",
                                    continuous=1,
                                    catname="share black",
                                    survey="roster",
                                    sample="roster"))

# census tract share college grads
tab1 <- bind_rows(tab1, compare_mto(var="f_c9010t_pcolldeg_dw",
                                    continuous=1,
                                    catname="share college grads",
                                    survey="roster",
                                    sample="roster"))

# census tract share employed
tab1 <- bind_rows(tab1, compare_mto(var="f_c9010t_pemp_dw",
                                    continuous=1,
                                    catname="share employed",
                                    survey="roster",
                                    sample="roster"))

# census tract percent below poverty line
tab1 <- bind_rows(tab1, compare_mto(var="f_c9010t_perpov_dw",
                                    continuous=1,
                                    catname="poverty",
                                    survey="roster",
                                    sample="roster"))

# census tract share hispanic
tab1 <- bind_rows(tab1, compare_mto(var="f_c9010t_phisp_dw",
                                    continuous=1,
                                    catname="share Hispanic",
                                    survey="roster",
                                    sample="roster"))

# census tract share female-headed households
tab1 <- bind_rows(tab1, compare_mto(var="f_c9010t_pfsfem_dw",
                                    continuous=1,
                                    catname="share female headed",
                                    survey="roster",
                                    sample="roster"))

# census tract share racial minority
tab1 <- bind_rows(tab1, compare_mto(var="f_c9010t_pminorty_dw",
                                    continuous=1,
                                    catname="share minority",
                                    survey="roster",
                                    sample="roster"))

# census tract share on welfare
tab1 <- bind_rows(tab1, compare_mto(var="f_c9010t_pwelf_dw",
                                    continuous=1,
                                    catname="share welfare",
                                    survey="roster",
                                    sample="roster"))


### SUBSET to age groups, repeat results ####

# now repeat this process to get results separately for each age group

# RUN the LOAD AND CLEAN block
# limit the sample to adults:
dat <- dat %>% filter(age_group=="adult")
tab1 <- data.frame(NULL)

# RUN the Adult final through Final Analysis blocks

#save out the results
tab_all <- tab1
tab_all$sample <- "adults"
write.csv(tab_all, "~match_check_adults_exp.csv")


# RUN the LOAD AND CLEAN block
# subset to teens at random assignment:
dat <- dat %>% filter(age_group=="old_kid")
tab1 <- data.frame(NULL)
# RUN the Adult final through Final Analysis blocks
tab_all <- tab1
tab_all$sample <- "older"

# RUN the LOAD AND CLEAN block
# subset to kids at random assignment:
dat <- dat %>% filter(age_group=="young_kid")
tab1 <- data.frame(NULL)
# RUN the Adult final through Final Analysis blocks
tab1$sample <- "younger"

# bind the teen and youth results together
tab_all <- bind_rows(tab_all, tab1)

# export results
write.csv(tab_all, "~/Desktop/match_check_kids_exp.csv")


### REDEFINE FUNCTION for section 8, repeat ####

# to obtain results for section 8:
# run the LOAD AND CLEAN block
# instead of running the "Create function" block:
# run this block to create a version of the function comparing the section 8 condition to control
# rename the files being exported to refer to section 8 (i.e. replace _exp with _s8.csv)
# follow the steps in the SUBSET blocks to get results by age group

compare_mto <- function(var,
                        cat=NA,
                        catname,
                        survey,
                        sample,
                        continuous=FALSE){
  
  sub <- dat[dat$ra_group_factor!="experimental",]
  
  if(continuous==FALSE){
    sub$var <- ifelse(sub[,var]==cat, 1, 0)
  }
  if(continuous==TRUE){
    sub$var <- sub[,var]
  }
  
  
  if(survey=="final"&sample=="adult"){
    sub <- sub[sub$f_svy_sample2007=="AD",]
    sub$wt <- sub$f_wt_totsvy_ad
  }
  
  if(survey=="final"&sample=="youth"){
    sub <- sub[sub$f_svy_sample2007=="YT",]
    sub$wt <- sub$f_wt_totsvy
  }
  
  if(survey=="roster"&sample=="roster"){
    sub <- sub
    sub$wt <- sub$f_wt_totcore98
  }
  
  if(survey=="final"&sample=="roster"){
    sub <- sub
    sub$wt <- sub$f_wt_totsvy
  }
  
  if(survey=="final"&sample=="report"){
    sub <- sub
    sub$wt <- sub$f_wt_totcore98
  }
  
  mod1 <- lm(formula = var ~ ra_group_factor, 
             data = sub, weights = wt)
  mod2 <- lm(formula = var ~ ra_group_factor, 
             data = sub[sub$matched==1,], weights = wt)
  
  full_exp <- as.numeric(mod1$coefficients[1] + mod1$coefficients[2])
  full_con <- as.numeric(mod1$coefficients[1])
  full_diff <- full_exp - full_con
  
  mat_exp <- as.numeric(mod2$coefficients[1] + mod2$coefficients[2])
  mat_con <- as.numeric(mod2$coefficients[1])
  mat_diff <- mat_exp - mat_con
  
  diff_diff <- full_diff - mat_diff
  diff_se <- sqrt(summary(mod1)$coefficients[2,2]^2 + 
                    summary(mod2)$coefficients[2,2]^2)
  diff_t <- diff_diff/diff_se
  
  return(data.frame("varname"=var,
                    "catname"=catname,
                    "full_exp"=full_exp,
                    "full_con"=full_con,
                    "full_diff"=full_diff,
                    "mat_exp"=mat_exp,
                    "mat_con"=mat_con,
                    "mat_diff"=mat_diff,
                    "diff_diff"=diff_diff,
                    "diff_t"=diff_t))
  
}

# Create figure with results ####

# import results from sample checks
dat <- read.csv("match_check_adults_exp.csv") 
dat$sample <- "adults"
dat$abs_t <- abs(dat$diff_t)

dat$cond <- "exp"

datb <- read.csv("match_check_kids_exp.csv")
datb$abs_t <- abs(datb$diff_t)
datb$cond <- "exp"

datc <- read.csv("match_check_adults_s8.csv") 
datc$abs_t <- abs(datc$diff_t)
datc$sample <- "adults"
datc$cond <- "s8"

datd <- read.csv("match_check_kids_s8.csv")
datd$abs_t <- abs(datd$diff_t)
datd$cond <- "s8"

# combine datasets
dat <- bind_rows(dat, datb, datc, datd)
rm(datb, datc, datd)
# remove duplicates
dat <- dat %>% distinct(abs_t,.keep_all = TRUE)

# create figure 3 in main text
a <- ggplot(dat) +
  geom_density(aes(x=abs_t, group=sample, fill=sample), binwidth=.1, color="black", alpha=.7) +
  theme_bw() +
  scale_fill_manual(name="Sample", 
                    labels=c("Adults", "Older Children", "Younger Children"),
                    values=c("black", "gray50", "gray90")) +
  geom_vline(xintercept=1.96, linetype=2) +
  xlab("Absolute Value of T-Statistic") + ylab("Density") + 
  ggtitle("Distribution of Absolute T-Stats: \n Difference in ATE between Matched and Full Samples") +
  theme(text=element_text(size=15)) + #coord_cartesian(xlim=c(0,2.2))
  xlim(c(0,2.2))

# export figure
pdf("Figures/figure3.pdf", width=10, height=6)
a
dev.off()


# create section 8 version
ggplot(dat) +
  geom_histogram(aes(x=abs_t, group=sample, fill=sample), binwidth=.15, color="black", alpha=.3) +
  theme_bw() +
  scale_fill_manual(name="Sample", 
                    labels=c("Adults", "Older Children", "Younger Children"),
                    values=c("red", "blue", "darkturquoise")) +
  geom_vline(xintercept=1.96, linetype=2) +
  xlab("Absolute Value of T-Statistic") + ylab("Density") + 
  ggtitle("Distribution of Absolute T-Stats: \n Difference in ATE between Matched and Full Samples") +
  theme(text=element_text(size=15)) + coord_cartesian(xlim=c(0,2.1))


